data = pd.read_csv('data/processed/tep_data.csv', index_col='Index')
print(f'Len of dataset: {data.shape[0]}')
Len of dataset: 12801
from src.models.autoencoder import build_autoencoder
import keras
create_params = dict(
input_shape=window_length,
hidden_layer_size=16,
hidden_layer_activation=None,
reg_strength=0.001,
input_dropout=-1,
)
compile_params = dict(
optimizer=keras.optimizers.Adam(lr=0.001),
loss='mse',
)
fit_params = dict(
batch_size=64,
epochs=100,
verbose=1,
callbacks=[keras.callbacks.ReduceLROnPlateau(patience=5),
keras.callbacks.EarlyStopping(min_delta=0.01, patience=15)]
)
model_fn = lambda: build_autoencoder(create_params, compile_params)
Using TensorFlow backend.
from src.utils import cross_validate
X_comp = rolling_window(X_one_component.to_numpy()[:, None], window_length=window_length).squeeze()
results = cross_validate(model_fn, 3, fit_params, X_comp, X_comp)
Fold 0... WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead. WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead. Train on 3185 samples, validate on 3184 samples Epoch 1/100 3185/3185 [==============================] - 1s 210us/step - loss: 1.0796 - val_loss: 0.6876 Epoch 2/100 3185/3185 [==============================] - 0s 41us/step - loss: 0.3806 - val_loss: 0.2924 Epoch 3/100 3185/3185 [==============================] - 0s 38us/step - loss: 0.2524 - val_loss: 0.2509 Epoch 4/100 3185/3185 [==============================] - 0s 24us/step - loss: 0.2305 - val_loss: 0.2321 Epoch 5/100 3185/3185 [==============================] - 0s 26us/step - loss: 0.2149 - val_loss: 0.2164 Epoch 6/100 3185/3185 [==============================] - 0s 25us/step - loss: 0.2015 - val_loss: 0.2031 Epoch 7/100 3185/3185 [==============================] - 0s 24us/step - loss: 0.1902 - val_loss: 0.1911 Epoch 8/100 3185/3185 [==============================] - 0s 29us/step - loss: 0.1801 - val_loss: 0.1811 Epoch 9/100 3185/3185 [==============================] - 0s 31us/step - loss: 0.1717 - val_loss: 0.1725 Epoch 10/100 3185/3185 [==============================] - 0s 31us/step - loss: 0.1645 - val_loss: 0.1654 Epoch 11/100 3185/3185 [==============================] - 0s 33us/step - loss: 0.1585 - val_loss: 0.1595 Epoch 12/100 3185/3185 [==============================] - 0s 33us/step - loss: 0.1534 - val_loss: 0.1544 Epoch 13/100 3185/3185 [==============================] - 0s 32us/step - loss: 0.1491 - val_loss: 0.1503 Epoch 14/100 3185/3185 [==============================] - 0s 30us/step - loss: 0.1455 - val_loss: 0.1468 Epoch 15/100 3185/3185 [==============================] - 0s 29us/step - loss: 0.1424 - val_loss: 0.1439 Epoch 16/100 3185/3185 [==============================] - 0s 27us/step - loss: 0.1397 - val_loss: 0.1413 Epoch 17/100 3185/3185 [==============================] - 0s 18us/step - loss: 0.1374 - val_loss: 0.1392 Epoch 18/100 3185/3185 [==============================] - 0s 24us/step - loss: 0.1354 - val_loss: 0.1371 Epoch 19/100 3185/3185 [==============================] - 0s 28us/step - loss: 0.1336 - val_loss: 0.1354 Epoch 20/100 3185/3185 [==============================] - 0s 29us/step - loss: 0.1321 - val_loss: 0.1340 Epoch 21/100 3185/3185 [==============================] - 0s 30us/step - loss: 0.1307 - val_loss: 0.1328 Epoch 22/100 3185/3185 [==============================] - 0s 30us/step - loss: 0.1296 - val_loss: 0.1317 Epoch 23/100 3185/3185 [==============================] - 0s 29us/step - loss: 0.1284 - val_loss: 0.1306 Epoch 24/100 3185/3185 [==============================] - 0s 32us/step - loss: 0.1274 - val_loss: 0.1296 Epoch 25/100 3185/3185 [==============================] - 0s 38us/step - loss: 0.1266 - val_loss: 0.1292 Epoch 26/100 3185/3185 [==============================] - 0s 36us/step - loss: 0.1259 - val_loss: 0.1281 Epoch 27/100 3185/3185 [==============================] - 0s 34us/step - loss: 0.1252 - val_loss: 0.1275 Epoch 28/100 3185/3185 [==============================] - 0s 32us/step - loss: 0.1246 - val_loss: 0.1268 Epoch 29/100 3185/3185 [==============================] - 0s 32us/step - loss: 0.1240 - val_loss: 0.1264 Epoch 30/100 3185/3185 [==============================] - 0s 28us/step - loss: 0.1235 - val_loss: 0.1258 Epoch 31/100 3185/3185 [==============================] - 0s 28us/step - loss: 0.1231 - val_loss: 0.1255 Epoch 32/100 3185/3185 [==============================] - 0s 28us/step - loss: 0.1226 - val_loss: 0.1252 Epoch 33/100 3185/3185 [==============================] - 0s 25us/step - loss: 0.1223 - val_loss: 0.1247 Epoch 34/100 3185/3185 [==============================] - 0s 27us/step - loss: 0.1219 - val_loss: 0.1243 Epoch 35/100 3185/3185 [==============================] - 0s 28us/step - loss: 0.1216 - val_loss: 0.1241 Epoch 36/100 3185/3185 [==============================] - 0s 24us/step - loss: 0.1213 - val_loss: 0.1237 Fold 1... Train on 6369 samples, validate on 3184 samples Epoch 1/100 6369/6369 [==============================] - 0s 53us/step - loss: 0.3681 - val_loss: 0.1769 Epoch 2/100 6369/6369 [==============================] - 0s 24us/step - loss: 0.1667 - val_loss: 0.1468 Epoch 3/100 6369/6369 [==============================] - 0s 15us/step - loss: 0.1458 - val_loss: 0.1348 Epoch 4/100 6369/6369 [==============================] - 0s 15us/step - loss: 0.1365 - val_loss: 0.1284 Epoch 5/100 6369/6369 [==============================] - 0s 15us/step - loss: 0.1313 - val_loss: 0.1245 Epoch 6/100 6369/6369 [==============================] - 0s 16us/step - loss: 0.1279 - val_loss: 0.1219 Epoch 7/100 6369/6369 [==============================] - 0s 19us/step - loss: 0.1257 - val_loss: 0.1201 Epoch 8/100 6369/6369 [==============================] - 0s 20us/step - loss: 0.1241 - val_loss: 0.1189 Epoch 9/100 6369/6369 [==============================] - 0s 20us/step - loss: 0.1229 - val_loss: 0.1178 Epoch 10/100 6369/6369 [==============================] - 0s 20us/step - loss: 0.1220 - val_loss: 0.1171 Epoch 11/100 6369/6369 [==============================] - 0s 21us/step - loss: 0.1213 - val_loss: 0.1165 Epoch 12/100 6369/6369 [==============================] - 0s 21us/step - loss: 0.1208 - val_loss: 0.1160 Epoch 13/100 6369/6369 [==============================] - 0s 19us/step - loss: 0.1203 - val_loss: 0.1157 Epoch 14/100 6369/6369 [==============================] - 0s 20us/step - loss: 0.1199 - val_loss: 0.1154 Epoch 15/100 6369/6369 [==============================] - 0s 19us/step - loss: 0.1196 - val_loss: 0.1152 Epoch 16/100 6369/6369 [==============================] - 0s 24us/step - loss: 0.1193 - val_loss: 0.1150 Epoch 17/100 6369/6369 [==============================] - 0s 24us/step - loss: 0.1192 - val_loss: 0.1149 Epoch 18/100 6369/6369 [==============================] - 0s 23us/step - loss: 0.1190 - val_loss: 0.1148 Epoch 19/100 6369/6369 [==============================] - 0s 24us/step - loss: 0.1188 - val_loss: 0.1146 Epoch 20/100 6369/6369 [==============================] - 0s 23us/step - loss: 0.1187 - val_loss: 0.1145 Fold 2... Train on 9553 samples, validate on 3184 samples Epoch 1/100 9553/9553 [==============================] - 1s 57us/step - loss: 0.2905 - val_loss: 0.1618 Epoch 2/100 9553/9553 [==============================] - 0s 28us/step - loss: 0.1459 - val_loss: 0.1371 Epoch 3/100 9553/9553 [==============================] - 0s 29us/step - loss: 0.1315 - val_loss: 0.1288 Epoch 4/100 9553/9553 [==============================] - 0s 28us/step - loss: 0.1256 - val_loss: 0.1248 Epoch 5/100 9553/9553 [==============================] - 0s 25us/step - loss: 0.1227 - val_loss: 0.1228 Epoch 6/100 9553/9553 [==============================] - 0s 21us/step - loss: 0.1210 - val_loss: 0.1214 Epoch 7/100 9553/9553 [==============================] - 0s 17us/step - loss: 0.1199 - val_loss: 0.1205 Epoch 8/100 9553/9553 [==============================] - 0s 17us/step - loss: 0.1192 - val_loss: 0.1200 Epoch 9/100 9553/9553 [==============================] - 0s 18us/step - loss: 0.1187 - val_loss: 0.1196 Epoch 10/100 9553/9553 [==============================] - 0s 18us/step - loss: 0.1183 - val_loss: 0.1192 Epoch 11/100 9553/9553 [==============================] - 0s 18us/step - loss: 0.1180 - val_loss: 0.1190 Epoch 12/100 9553/9553 [==============================] - 0s 17us/step - loss: 0.1177 - val_loss: 0.1187 Epoch 13/100 9553/9553 [==============================] - 0s 14us/step - loss: 0.1175 - val_loss: 0.1184 Epoch 14/100 9553/9553 [==============================] - 0s 14us/step - loss: 0.1173 - val_loss: 0.1183 Epoch 15/100 9553/9553 [==============================] - 0s 14us/step - loss: 0.1172 - val_loss: 0.1182 Epoch 16/100 9553/9553 [==============================] - 0s 14us/step - loss: 0.1171 - val_loss: 0.1181 Epoch 17/100 9553/9553 [==============================] - 0s 14us/step - loss: 0.1170 - val_loss: 0.1180 Epoch 18/100 9553/9553 [==============================] - 0s 14us/step - loss: 0.1169 - val_loss: 0.1181 Epoch 19/100 9553/9553 [==============================] - 0s 14us/step - loss: 0.1168 - val_loss: 0.1179
from src.visualization.visualize import visualize_cv_result
visualize_cv_result(results, f'Autoencoder loss')
import keras
from keras.layers import Input
from keras.models import Model, load_model
model = load_model('models/onedimensional_autoencoder.h5')
inp = Input(shape=(window_length,))
encoder_layer = model.layers[0]
encoded = encoder_layer(inp)
encoder = Model(inputs=inp, outputs=encoded)
from src.data.generate import generate_anomalies
data_with_anom = data[str(component)].copy()
anom_amount = 100
anom_idxs_start = np.random.choice(len(data_with_anom),
anom_amount,
replace=False)
anom_lens = np.zeros(anom_amount, dtype=int)
anom_idxs = np.zeros(len(data_with_anom), dtype=int)
for i, idx in enumerate(anom_idxs_start):
l = np.random.randint(window_length, window_length * 2)
if idx + l > data_with_anom.shape[0]:
raise Exception('Try again, index out of range (FIXME THEN)')
anom_lens[i] = l
anoms = generate_anomalies(l)
anom_idxs[idx:idx + l] = 1
data_with_anom[idx:idx + l] = anoms[np.random.randint(len(anoms))]
Найдём оптимальное значение количества кластеров по elbow
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
kmeans = KMeans()
visualizer = KElbowVisualizer(kmeans, k=(2,10), metric='distortion')
visualizer.fit(encoded)
visualizer.poof()
<matplotlib.axes._subplots.AxesSubplot at 0x258fea18f60>
from yellowbrick.cluster import InterclusterDistance
km = KMeans(3)
intra_visualizer = InterclusterDistance(km)
intra_visualizer.fit(encoded) # Fit the data to the visualizer
intra_visualizer.poof() # Draw/show/poof the data
<matplotlib.axes._subplots.AxesSubplot at 0x2588009a7f0>
Будем считать, что 0-ой и 2-ой кластеры полностью аномальные (выбрали наименьшие кластеры)
pred = km.predict(encoded)
maximal_cluster = pd.Series(pred).value_counts().idxmax()
anoms_pred = np.where(pred != maximal_cluster)[0]
from sklearn.metrics import recall_score, precision_score
from src.utils import intersection_over_true
recall = intersection_over_true(data.shape[0], anom_idxs_start, anom_lens, anoms_pred, window_length, recall_score)
precision = intersection_over_true(data.shape[0], anom_idxs_start, anom_lens, anoms_pred, window_length, precision_score)
print(f'Полнота={recall:.3f}, Точность={precision:.3f}')
Полнота=0.216, Точность=0.240
# Blue - anomalies predicted by model
# Red - anomalies that was not catched by model
# Green - original data, that is neither anomaly nor predicted as anomaly
show(p)